/********************************************************************
Name: Finding Control Group Size That Minimizes Forward 
	Prediction Error, Synthetic Control (Figure A5)
Author: Dan Thompson
Date: September 2021
Description: 

********************************************************************/

clear all
set matsize 11000
set maxvar 15000
gl path = "~/Dropbox/Mariel Effects"


* Set up the placebo analysis data
use if year<1980 using "$path/Replication/data/county_analysis_data.dta", clear
tsset rank year
tempfile data
save `data'

* Set up a matrix to store placebo effect estimates
matrix define Output = J(1000, 1, .)

* Loop over the size of the control pool
qui forval i=25(5)900 {

	* Bring in the analysis data for the relevant control pool
	use if rank<=`i' using `data', clear
	
	* Create the synthetic control
	cap synth rep_vs rep_vs(1960) rep_vs(1964) rep_vs(1968) rep_vs(1972), ///
		trunit(1) trperiod(1976)
	if _rc!=0 {
		noi di as error "synth with `i' counties error (max 3055) $S_TIME"
		continue
	}
	
	* Call the output into some matrices
	matrix Treat = e(Y_treated)
	matrix Synth = e(Y_synthetic)
	matrix Output[`i',1] = Treat[5,1] - Synth[5,1]
	
	* Print a status update
	noi di as text "synth with `i' counties complete (max 3055) $S_TIME"
}

* Call the synthetic control method output back into memory
clear
svmat Output
rename Output1 effect
gen num_counties = _n
drop if effect==.

* Save the formatted output to a data set
save "$path/Replication/modified_data/prediction_synth.dta", replace

* Run the figure (Fig A5)
use "$path/Replication/modified_data/prediction_synth.dta", clear
gen abs_effect = abs(effect)
twoway (line abs_effect num, lc(gs2) lw(vthin)), ///
	yti("Abs(Prediction Error) (1976)") ///
	xti("Number of Counties") ///
	xli(250, lc(gs8) lp(shortdash)) ///
	xli(555, lc(gs8) lp(shortdash)) ///
	graphregion(color(white)) ///
	name("optimize_counties_synth", replace)
graph export "$path/Replication/output/prediction_synth.pdf", ///
	replace name("optimize_counties_synth")
	
